"""
下载抖音短视频和评论：
不管怎么设防，只要出现在我的浏览器里面，我都能下载到。
知识点：selenium、browsermob-proxy

下载 bmp: http://bmp.lightbody.net/
使用 bmp 需要安装 Java

"""

from browsermobproxy import Server as BMPServer
from selenium.webdriver import ChromeOptions
from seleniumrequests import Chrome
import brotli
import base64
import json

bmp_server_path = r'D:\2020fall\juicy\release\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat'
bmp_port = 9312
chrome_driver_path = r'C:\Users\what\Downloads\chromedriver.exe'

bmp = BMPServer(path=bmp_server_path, options={'port': bmp_port})
bmp.start()
proxy = bmp.create_proxy()
proxy.new_har('douyin', options={'captureHeaders': True, 'captureContent': True, 'captureBinaryContent': True})

chrome_opts = ChromeOptions()
chrome_opts.add_argument('--proxy-server={}'.format(proxy.proxy))
chrome_opts.add_argument('--ignore-certificate-errors')
chrome_opts.add_argument('ignore-urlfetcher-cert-requests')

chrome = Chrome(executable_path=chrome_driver_path, options=chrome_opts)

chrome.get('http://douyin.com')

# 评论
comments = []
for e in proxy.har['log']['entries']:
    if e['request']['url'].startswith(
            'https://www.douyin.com/aweme/v1/web/comment/list/?device_platform=webapp'):
        comments.append(e)

# 视频
videos = []
for e in proxy.har['log']['entries']:
    if ('video' in e['request']['url']) and ('/?a=' in e['request']['url']):
        videos.append(e)

r = chrome.request('GET', videos[0]['request']['url'])
with open('v.mp4', 'wb') as f:
    for chunk in r.iter_content(chunk_size=2048):
        f.write(chunk)


comment_responses = []

# decode comments
for comment in comments:
    b = comment['response']['content']['text']
    b = base64.b64decode(b)
    b = brotli.decompress(b)
    j = json.loads(b)
    comment_responses.append(j)

